Libraries
library(tidyverse)
library(readxl)
library(ggforce)
library(knitr)
library(forcats)

pitcher_test <- pitchers %>% 
  mutate(group_indicator = case_when(
    ID == "Great" ~ 1,
    ID == "Decent" ~ 2,
    ID == "Bad" ~ 3))

# Correlation between spin rate and run expectancy
pitcher_test %>% 
  filter(!is.na(release_spin_rate) & !is.na(run_exp_added)) %>% 
  with( cor(release_spin_rate, run_exp_added) )
## [1] 0.01304765
# Correlation between spin rate and run expectancy
pitcher_test %>% 
  filter(!is.na(pitch_speed) & !is.na(run_exp_added)) %>% 
  with( cor(pitch_speed, run_exp_added) )
## [1] -0.008914642
# Can batted ball type predict run expectancy?
lm(run_exp_added ~ bb_type, data = pitcher_test)
## 
## Call:
## lm(formula = run_exp_added ~ bb_type, data = pitcher_test)
## 
## Coefficients:
##        (Intercept)  bb_typeground_ball   bb_typeline_drive        bb_typepopup  
##            -0.1025              0.1706             -0.1393              0.3284
# Intercept = Fly Ball

test_model <- pitcher_test %>% 
  filter( !(player_name  %in% 
             c("Scherzer, Max", "Taillon, Jameson", "Berríos, José")) ) %>% 
lm(run_exp_added ~ bb_type, data = .)

test_testdata <- pitcher_test %>% 
  filter(player_name  %in% 
             c("Scherzer, Max", "Taillon, Jameson", "Berríos, José"),
         !is.na(bb_type)) %>% 
  select(ID, pitch_type, run_exp_added, bb_type)

test_testdata %>% 
  mutate(preds = predict(test_model, test_testdata))
## # A tibble: 1,442 × 5
##    ID    pitch_type run_exp_added bb_type       preds
##    <chr> <chr>              <dbl> <chr>         <dbl>
##  1 Great SL                 0.207 ground_ball  0.0784
##  2 Great CU                 0.221 ground_ball  0.0784
##  3 Great CH                -1.03  fly_ball    -0.111 
##  4 Great FF                 0.181 fly_ball    -0.111 
##  5 Great FF                -1.66  fly_ball    -0.111 
##  6 Great SL                -0.27  fly_ball    -0.111 
##  7 Great FF                 0.206 ground_ball  0.0784
##  8 Great SL                -0.183 ground_ball  0.0784
##  9 Great SL                 0.406 ground_ball  0.0784
## 10 Great FC                -0.752 ground_ball  0.0784
## # ℹ 1,432 more rows
# row.names = FALSE